{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Tutorial on self-normalizing networks on the MNIST data set: multi-layer perceptrons\n",
    "\n",
    "*Author:* Guenter Klambauer, 2017\n",
    "\n",
    "tested under Python 3.5 and Tensorflow 1.1\n",
    "\n",
    "Derived from: [Aymeric Damien](https://github.com/aymericdamien/TensorFlow-Examples/) \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Extracting /tmp/data/train-images-idx3-ubyte.gz\n",
      "Extracting /tmp/data/train-labels-idx1-ubyte.gz\n",
      "Extracting /tmp/data/t10k-images-idx3-ubyte.gz\n",
      "Extracting /tmp/data/t10k-labels-idx1-ubyte.gz\n"
     ]
    }
   ],
   "source": [
    "import tensorflow as tf\n",
    "import numpy as np\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "\n",
    "from __future__ import absolute_import, division, print_function\n",
    "import numbers\n",
    "from tensorflow.contrib import layers\n",
    "from tensorflow.python.framework import ops\n",
    "from tensorflow.python.framework import tensor_shape\n",
    "from tensorflow.python.framework import tensor_util\n",
    "from tensorflow.python.ops import math_ops\n",
    "from tensorflow.python.ops import random_ops\n",
    "from tensorflow.python.ops import array_ops\n",
    "from tensorflow.python.layers import utils\n",
    "\n",
    "\n",
    "# Import MINST data\n",
    "from tensorflow.examples.tutorials.mnist import input_data\n",
    "mnist = input_data.read_data_sets(\"/tmp/data/\", one_hot=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Parameters\n",
    "learning_rate = 0.05\n",
    "training_epochs = 15\n",
    "batch_size = 100\n",
    "display_step = 1\n",
    "\n",
    "# Network Parameters\n",
    "n_hidden_1 = 784 # 1st layer number of features\n",
    "n_hidden_2 = 784 # 2nd layer number of features\n",
    "n_input = 784 # MNIST data input (img shape: 28*28)\n",
    "n_classes = 10 # MNIST total classes (0-9 digits)\n",
    "\n",
    "# tf Graph input\n",
    "x = tf.placeholder(\"float\", [None, n_input])\n",
    "y = tf.placeholder(\"float\", [None, n_classes])\n",
    "dropoutRate = tf.placeholder(tf.float32)\n",
    "is_training= tf.placeholder(tf.bool)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### (1) Definition of scaled exponential linear units (SELUs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def selu(x):\n",
    "    with ops.name_scope('elu') as scope:\n",
    "        alpha = 1.6732632423543772848170429916717\n",
    "        scale = 1.0507009873554804934193349852946\n",
    "        return scale*tf.where(x>=0.0, x, alpha*tf.nn.elu(x))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### (2) Definition of dropout variant for SNNs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def dropout_selu(x, rate, alpha= -1.7580993408473766, fixedPointMean=0.0, fixedPointVar=1.0, \n",
    "                 noise_shape=None, seed=None, name=None, training=False):\n",
    "    \"\"\"Dropout to a value with rescaling.\"\"\"\n",
    "\n",
    "    def dropout_selu_impl(x, rate, alpha, noise_shape, seed, name):\n",
    "        keep_prob = 1.0 - rate\n",
    "        x = ops.convert_to_tensor(x, name=\"x\")\n",
    "        if isinstance(keep_prob, numbers.Real) and not 0 < keep_prob <= 1:\n",
    "            raise ValueError(\"keep_prob must be a scalar tensor or a float in the \"\n",
    "                                             \"range (0, 1], got %g\" % keep_prob)\n",
    "        keep_prob = ops.convert_to_tensor(keep_prob, dtype=x.dtype, name=\"keep_prob\")\n",
    "        keep_prob.get_shape().assert_is_compatible_with(tensor_shape.scalar())\n",
    "\n",
    "        alpha = ops.convert_to_tensor(alpha, dtype=x.dtype, name=\"alpha\")\n",
    "        keep_prob.get_shape().assert_is_compatible_with(tensor_shape.scalar())\n",
    "\n",
    "        if tensor_util.constant_value(keep_prob) == 1:\n",
    "            return x\n",
    "\n",
    "        noise_shape = noise_shape if noise_shape is not None else array_ops.shape(x)\n",
    "        random_tensor = keep_prob\n",
    "        random_tensor += random_ops.random_uniform(noise_shape, seed=seed, dtype=x.dtype)\n",
    "        binary_tensor = math_ops.floor(random_tensor)\n",
    "        ret = x * binary_tensor + alpha * (1-binary_tensor)\n",
    "\n",
    "        a = tf.sqrt(fixedPointVar / (keep_prob *((1-keep_prob) * tf.pow(alpha-fixedPointMean,2) + fixedPointVar)))\n",
    "\n",
    "        b = fixedPointMean - a * (keep_prob * fixedPointMean + (1 - keep_prob) * alpha)\n",
    "        ret = a * ret + b\n",
    "        ret.set_shape(x.get_shape())\n",
    "        return ret\n",
    "\n",
    "    with ops.name_scope(name, \"dropout\", [x]) as name:\n",
    "        return utils.smart_cond(training,\n",
    "            lambda: dropout_selu_impl(x, rate, alpha, noise_shape, seed, name),\n",
    "            lambda: array_ops.identity(x))\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### (3) Input data scaled to zero mean and unit variance"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# (1) Scale input to zero mean and unit variance\n",
    "scaler = StandardScaler().fit(mnist.train.images)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Tensorboard\n",
    "logs_path = '~/tmp'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Create model\n",
    "def multilayer_perceptron(x, weights, biases, rate, is_training):\n",
    "    # Hidden layer with SELU activation\n",
    "    layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])\n",
    "    #netI_1 = layer_1\n",
    "    layer_1 = selu(layer_1)\n",
    "    layer_1 = dropout_selu(layer_1,rate, training=is_training)\n",
    "    \n",
    "    # Hidden layer with SELU activation\n",
    "    layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])\n",
    "    #netI_2 = layer_2\n",
    "    layer_2 = selu(layer_2)\n",
    "    layer_2 = dropout_selu(layer_2,rate, training=is_training)\n",
    "\n",
    "    # Output layer with linear activation\n",
    "    out_layer = tf.matmul(layer_2, weights['out']) + biases['out']\n",
    "    return out_layer"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### (4) Initialization with STDDEV of sqrt(1/n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# Store layers weight & bias\n",
    "weights = {\n",
    "    'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1],stddev=np.sqrt(1/n_input))),\n",
    "    'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2],stddev=np.sqrt(1/n_hidden_1))),\n",
    "    'out': tf.Variable(tf.random_normal([n_hidden_2, n_classes],stddev=np.sqrt(1/n_hidden_2)))\n",
    "}\n",
    "biases = {\n",
    "    'b1': tf.Variable(tf.random_normal([n_hidden_1],stddev=0)),\n",
    "    'b2': tf.Variable(tf.random_normal([n_hidden_2],stddev=0)),\n",
    "    'out': tf.Variable(tf.random_normal([n_classes],stddev=0))\n",
    "}\n",
    "# Construct model\n",
    "pred = multilayer_perceptron(x, weights, biases, rate=dropoutRate, is_training=is_training)\n",
    "\n",
    "# Define loss and optimizer\n",
    "cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))\n",
    "optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cost)\n",
    "\n",
    " # Test model\n",
    "correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))\n",
    "# Calculate accuracy\n",
    "accuracy = tf.reduce_mean(tf.cast(correct_prediction, \"float\"))\n",
    "         \n",
    "# Initializing the variables\n",
    "init = tf.global_variables_initializer()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# Create a histogramm for weights\n",
    "tf.summary.histogram(\"weights2\", weights['h2'])\n",
    "tf.summary.histogram(\"weights1\", weights['h1'])\n",
    "\n",
    "# Create a summary to monitor cost tensor\n",
    "tf.summary.scalar(\"loss\", cost)\n",
    "# Create a summary to monitor accuracy tensor\n",
    "tf.summary.scalar(\"accuracy\", accuracy)\n",
    "# Merge all summaries into a single op\n",
    "merged_summary_op = tf.summary.merge_all()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: 0001 cost= 0.383099532\n",
      "Train-Accuracy: 0.97 Train-Loss: 0.10093\n",
      "Validation-Accuracy: 0.931641 Val-Loss: 0.31581 \n",
      "\n",
      "Epoch: 0002 cost= 0.261214849\n",
      "Train-Accuracy: 0.95 Train-Loss: 0.245151\n",
      "Validation-Accuracy: 0.929688 Val-Loss: 0.26973 \n",
      "\n",
      "Epoch: 0003 cost= 0.207959975\n",
      "Train-Accuracy: 0.98 Train-Loss: 0.0699748\n",
      "Validation-Accuracy: 0.923828 Val-Loss: 0.254827 \n",
      "\n",
      "Epoch: 0004 cost= 0.170601225\n",
      "Train-Accuracy: 1.0 Train-Loss: 0.0443216\n",
      "Validation-Accuracy: 0.966797 Val-Loss: 0.134846 \n",
      "\n",
      "Epoch: 0005 cost= 0.145285159\n",
      "Train-Accuracy: 1.0 Train-Loss: 0.0269495\n",
      "Validation-Accuracy: 0.96875 Val-Loss: 0.110439 \n",
      "\n",
      "Epoch: 0006 cost= 0.124336535\n",
      "Train-Accuracy: 0.99 Train-Loss: 0.0192373\n",
      "Validation-Accuracy: 0.964844 Val-Loss: 0.176624 \n",
      "\n",
      "Epoch: 0007 cost= 0.106369379\n",
      "Train-Accuracy: 1.0 Train-Loss: 0.0164243\n",
      "Validation-Accuracy: 0.980469 Val-Loss: 0.0833913 \n",
      "\n",
      "Epoch: 0008 cost= 0.094885690\n",
      "Train-Accuracy: 0.99 Train-Loss: 0.0330377\n",
      "Validation-Accuracy: 0.972656 Val-Loss: 0.0719449 \n",
      "\n",
      "Epoch: 0009 cost= 0.084200106\n",
      "Train-Accuracy: 0.99 Train-Loss: 0.023436\n",
      "Validation-Accuracy: 0.964844 Val-Loss: 0.164996 \n",
      "\n",
      "Epoch: 0010 cost= 0.076172222\n",
      "Train-Accuracy: 1.0 Train-Loss: 0.0200319\n",
      "Validation-Accuracy: 0.970703 Val-Loss: 0.126767 \n",
      "\n",
      "Epoch: 0011 cost= 0.068200123\n",
      "Train-Accuracy: 1.0 Train-Loss: 0.0051565\n",
      "Validation-Accuracy: 0.972656 Val-Loss: 0.157592 \n",
      "\n",
      "Epoch: 0012 cost= 0.062415765\n",
      "Train-Accuracy: 0.98 Train-Loss: 0.0301026\n",
      "Validation-Accuracy: 0.976562 Val-Loss: 0.125026 \n",
      "\n",
      "Epoch: 0013 cost= 0.056047069\n",
      "Train-Accuracy: 1.0 Train-Loss: 0.0111594\n",
      "Validation-Accuracy: 0.96875 Val-Loss: 0.150638 \n",
      "\n",
      "Epoch: 0014 cost= 0.053823661\n",
      "Train-Accuracy: 1.0 Train-Loss: 0.0100787\n",
      "Validation-Accuracy: 0.976562 Val-Loss: 0.0863238 \n",
      "\n",
      "Epoch: 0015 cost= 0.048514629\n",
      "Train-Accuracy: 0.99 Train-Loss: 0.0266962\n",
      "Validation-Accuracy: 0.976562 Val-Loss: 0.0925855 \n",
      "\n"
     ]
    }
   ],
   "source": [
    "# Launch the graph\n",
    "gpu_options = tf.GPUOptions(allow_growth=True)\n",
    "with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:\n",
    "    sess.run(init)\n",
    "\n",
    "    summary_writer = tf.summary.FileWriter(logs_path, graph=tf.get_default_graph())\n",
    "\n",
    "    # Training cycle\n",
    "    for epoch in range(training_epochs):\n",
    "        avg_cost = 0.\n",
    "        total_batch = int(mnist.train.num_examples/batch_size)\n",
    "        # Loop over all batches\n",
    "        for i in range(total_batch):\n",
    "            batch_x, batch_y = mnist.train.next_batch(batch_size)\n",
    "            batch_x = scaler.transform(batch_x)\n",
    "            # Run optimization op (backprop) and cost op (to get loss value)\n",
    "            _, c = sess.run([optimizer, cost], feed_dict={x: batch_x,\n",
    "                                                          y: batch_y, dropoutRate: 0.05, is_training:True})\n",
    "\n",
    "            # Compute average loss\n",
    "            avg_cost += c / total_batch\n",
    "        # Display logs per epoch step\n",
    "        if epoch % display_step == 0:\n",
    "            print (\"Epoch:\", '%04d' % (epoch+1), \"cost=\",\"{:.9f}\".format(avg_cost))\n",
    "            \n",
    "            accTrain, costTrain, summary = sess.run([accuracy, cost, merged_summary_op], \n",
    "                                                        feed_dict={x: batch_x, y: batch_y, \n",
    "                                                                   dropoutRate: 0.0, is_training:False})\n",
    "            summary_writer.add_summary(summary, epoch)\n",
    "            \n",
    "            print(\"Train-Accuracy:\", accTrain,\"Train-Loss:\", costTrain)\n",
    "\n",
    "            batch_x_test, batch_y_test = mnist.test.next_batch(512)\n",
    "            batch_x_test = scaler.transform(batch_x_test)\n",
    "\n",
    "            accTest, costVal = sess.run([accuracy, cost], feed_dict={x: batch_x_test, y: batch_y_test, \n",
    "                                                                   dropoutRate: 0.0, is_training:False})\n",
    "\n",
    "            print(\"Validation-Accuracy:\", accTest,\"Val-Loss:\", costVal,\"\\n\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "tf-alpha",
   "language": "python",
   "name": "tf-alpha"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
